# -*- coding: utf-8 -*-
import re

import scrapy

from scrapy_zhaohaofang.items import ScrapyZhaohaofangItem
from scrapy_zhaohaofang.spiders.utils.item_util import get_now_date, get_now_year, str_compress_blank
from scrapy_zhaohaofang.spiders.utils.zhaohaofang import ZhfBaseSpider


class QiangWei58Spider(ZhfBaseSpider):
    name = "qiangwei"
    allowed_domains = ['jn.58.com']
    brand = "蔷薇公寓"
    bid = 5

    def start_requests(self):
        self.logger.info('爬虫开始，进入首页')
        return [self.begin_crawl(self.parse_list_page)]

    def begin_crawl(self, callback):
        url_first_page = 'http://jn.58.com/hezu/?key=%s&cmcskey=%s&final=1&jump=1&specialtype=gls' % (
            self.brand, self.brand)
        self.logger.info(url_first_page)
        return scrapy.Request(url_first_page,
                              headers={
                                  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                                  'Accept-Language': 'en',
                                  'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) '
                                                'AppleWebKit/537.36 (KHTML, like Gecko) '
                                                'Chrome/65.0.3325.146 Safari/537.36',
                                  'Host': 'jn.58.com'
                              },
                              callback=callback,
                              dont_filter=True)

    def parse_list_page(self, response):
        # 获得当前页面所有壁纸的链接
        apartment_brands = response.css('span.jjr_par_dp::text').extract()
        detial__page_inks = response.css('div.des > h2 > a::attr(href)').extract()

        # 遍历所有的链接;
        for index, apartment_brand in enumerate(apartment_brands):
            # 提取出图片的页面链接
            # if "国瑞房地产" in apartment_brand:
            url = detial__page_inks[index]
            # 接收一个url,返回一个用于爬取的Request对象
            yield scrapy.Request(url,
                                 headers={
                                     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                                     'Accept-Language': 'en',
                                     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) '
                                                   'AppleWebKit/537.36 (KHTML, like Gecko) '
                                                   'Chrome/65.0.3325.146 Safari/537.36',
                                     'Host': 'jn.58.com'
                                 },
                                 callback=self.parse_detail_page,
                                 dont_filter=True)
            # else:
            #     self.logger.info("当前品牌商不是国瑞房地产,而是: %s" % apartment_brand.strip())
        #     # Request 对象在sprider种被生成并最终传递到 下载器 ,下载器 对其进行处理并返回 Response 对象;
        #     # Request 对象代表一个HTTP请求，一般来讲， HTTP请求是由Spider产生并被Downloader处理进而生成一个 Response
        #     # Response 对象还会返回到生成 request 的 sprider 种

        # 获得下一页的链接
        has_next_page = response.css('a.next > span::text').extract_first()
        if has_next_page:
            url_next_page = response.css('a.next::attr(href)').extract_first()
            referer = 'http://jn.58.com/hezu/?key=%s&cmcskey=%s&final=1&jump=1&specialtype=gls' % (
                self.brand, self.brand)
            yield scrapy.Request(
                url_next_page,
                headers={
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Accept-Language': 'en',
                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) '
                                  'AppleWebKit/537.36 (KHTML, like Gecko) '
                                  'Chrome/65.0.3325.146 Safari/537.36',
                    'Host': 'jn.58.com',
                    'Referer': referer
                },
                callback=self.parse_list_page,
                dont_filter=True)

    def parse(self, response):
        pass

    #
    def parse_detail_page(self, response):

        room_title = response.css('h1.c_333::text').extract_first()

        rent_price = response.css('b.f36::text').extract_first()
        rent_unit = response.css('span.c_ff552e::text').extract_first().strip()
        rent_pay_style = response.css('div.house-pay-way > span.c_333::text').extract_first()
        lease_style = response.css('ul.f14 > li:nth-child(1) >span:nth-child(2)::text').extract_first()
        house_kind_str = str_compress_blank(
            response.css('ul.f14 > li:nth-child(2) >span:nth-child(2)::text').extract_first())

        house_layout_kind = None if house_kind_str.count(" ") != 3 else house_kind_str.split(" ")[0]
        lease_area = None if house_kind_str.count(" ") != 3 else house_kind_str.split(" ")[1]
        house_decoration_kind = None if house_kind_str.count(" ") != 3 else house_kind_str.split(" ")[3]
        house_face_floor_str = str_compress_blank(
            response.css('ul.f14 > li:nth-child(3) >span:nth-child(2)::text').extract_first())
        house_face = None if house_face_floor_str.count(" ") != 1 else house_face_floor_str.split(" ")[0]
        house_floor_str = None if house_face_floor_str.count(" ") != 1 else house_face_floor_str.split(" ")[1]
        house_at_floor_str = None if not house_floor_str or "/" not in house_floor_str else house_floor_str.split("/")[
            0]
        house_total_floor_str = None if not house_floor_str or "/" not in house_floor_str else \
            house_floor_str.split("/")[1]
        house_at_floor = 0 if not house_at_floor_str or not house_at_floor_str.isnumeric() else int(house_at_floor_str)
        house_total_floor = 0 if not house_at_floor_str else house_total_floor_str[1:-1]
        community_name = response.css('ul.f14 > li:nth-child(4) >span:nth-child(2) > a::text').extract_first()
        quarter_name = ",".join(response.css('ul.f14 > li:nth-child(5) >span:nth-child(2) > a::text').extract()[0:2])
        detai_addr = response.css('ul.f14 > li:nth-child(6) >span:nth-child(2)::text').extract_first().strip()

        agent_name = response.css('a.c_000::text').extract_first()
        agent_company = response.css('p.agent-subgroup::text').extract_first()
        agent_phone = response.css('span.house-chat-txt::text').extract_first()

        key_words = ' '.join(response.css("ul.introduce-item > li > span.a2 em::text").extract())
        publish_time_str = str_compress_blank(response.css('p.house-update-info::text').extract_first()).split(" ")[0]
        publish_time = get_now_date() if "前" in publish_time_str else "".join([get_now_year(), "-", publish_time_str])

        tenement_company_str = response.css(
            'ul.district-info-list > li:nth-child(3) > span:nth-child(2)::text').extract_first()
        if re.search("\d", tenement_company_str):
            tenement_company = response.css(
                'ul.district-info-list > li:nth-child(2) > span:nth-child(2)::text').extract_first()
        else:
            tenement_company = tenement_company_str

        url_house_pics = response.css('ul#leftImg > li > img::attr(src)').extract()

        item = ScrapyZhaohaofangItem()
        item["bid"] = self.bid
        item["pic_urls"] = url_house_pics

        item["qrt_name"] = community_name
        item["qrt_addr"] = detai_addr

        item["prt_name"] = tenement_company

        item["name"] = "".join([community_name, house_layout_kind])
        item["cbd"] = quarter_name
        room_count_array = None if not house_layout_kind or "室" not in house_layout_kind else house_layout_kind.split(
            "室")
        item["room"] = 0 if not room_count_array else room_count_array[0]
        hal_count_array = None if not room_count_array or "厅" not in room_count_array[1] else room_count_array[1].split(
            "厅")
        item["hal"] = 0 if not hal_count_array else hal_count_array[0]
        tlt_count_array = None if not hal_count_array or "卫" not in hal_count_array[1] else hal_count_array[1].split(
            "卫")
        item["tlt"] = 0 if not tlt_count_array else tlt_count_array[0]
        item["ornt"] = self.face_dict[house_face]
        item["flr"] = house_at_floor
        item["flrs"] = house_total_floor

        item["kwd"] = key_words
        item["cntr"] = agent_name
        item["cntrt"] = agent_phone

        item["mny"] = rent_price

        if not rent_pay_style:
            item["dpst"] = 0
            item["pay"] = 0
            pass
        elif len(rent_pay_style) == 4:
            item["dpst"] = self.rent_pay_type_dict[rent_pay_style[1:2]]
            item["pay"] = self.rent_pay_type_dict[rent_pay_style[3:]]
        elif len(rent_pay_style) == 3:
            item["dpst"] = 0
            item["pay"] = 6
        elif len(rent_pay_style) == 2:
            item["dpst"] = 0
            item["pay"] = 12
        else:
            self.logger.info("付款方式解析错误, 付款方式抓取的内容是: %s" % rent_pay_style)
        if not lease_style:
            self.logger.info("出租方式解析错误, 出租方式抓取的内容是: %s" % lease_style)
        elif "整租" == lease_style:
            item["rom"] = 0
            item["gdr"] = 0
        elif "合租" in lease_style:
            room_style_str = lease_style.split(" - ")[1]
            gender_style_str = lease_style.split(" - ")[2]
            item["rom"] = self.room_type_dict[room_style_str]
            item["gdr"] = self.gdr_type_dict[gender_style_str]

        item["area"] = lease_area
        item["ctm"] = publish_time
        item["ttl"] = room_title
        # 不知道为什么这个数是必须要有的,只要有就正常 没有就不正常
        item["pic_pid"] = 0
        yield item

    def closed(self, reason=None):
        self.logger.info("爬取结束了, 结束原因是: %s " % reason)
